*
*
* garch-mc-norm-experiment1
* 4/16/24
* normal resids, T = 250	
* saved files:
* dgp_scenario1_BASEPRED.dta
* dgp_scenario1_PBSPRED.dta
* dgp_scenario1_MEPRED.dta
* dgp_scenario1_RESIDPRED.dta 
* -----------------------------------------------------------------------
cd "~/Dropbox/Benton-Jordan-Philips/final-JOP-replication"

* run garch sim program:
do "scripts-programs/synthgarch.ado"



* -----------------------------------------------------------------------------
* -----------------------------------------------------------------------------
* ---------- SCENARIO 1 ----------------------
* -----------------------------------------------------------------------------
/*	ARCH = 0.1
	GARCH = 0.5
	T = 250
	beta_x = .5
	beta_z = -1
	constant = 0.5
	phi = 0.25
	x is continuous, z is dichotomous, epsilon is normal
	burnin = 250
	sims = 100
	scenname = scenario1
*/

loc dgp_sims = 500 // how many draws from the DGP are we taking?
loc rep_sims = 500 // how many reps OF a dgp_sim do we want to take?

* -----------------------------------------------------------------------
* -->	STEP 1: GENERATE SERIES. 
* this is done for all steps below
synthgarch, time(250) garchparam(0.5) ///
 archparam(0.1) xparam(.5) zparam(-1) consparam(0.5) ///
 phiparam(0.25) burnin(250) seedz(2000221) sims(`dgp_sims') scenname(scenario1)
* -----------------------------------------------------------------------

* -----------------------------------------------------------------------
* -->	STEP 2: RUN BASE GARCH MODEL
 * create predictions and save all M of these in order to get a 95% CI that we will compare with our bs techniques
use "dgp_scenario1.dta", clear
set seed 1245039932
tsset t
set obs 2000 // ensure we can store up to M results
* create blank variables to store the params:
gen normalitypval = .
gen garchterm = .
gen archterm = . 
gen hetx = .
gen hetz = .
gen constant = .

* now estimate M GARCH models, saving coefficients from HET eq:
forv i = 1/`dgp_sims' {
	* run garch
	cap arch y_`i' l.y_`i' x_`i' z_`i', arch(1) garch(1) het(x_`i' z_`i')
	
	if "`e(converged)'" == "1" {
		* a. perform a residual test (e.g., normality met?).
		predict res, residual
		sktest res
		replace normalitypval = r(p_chi2) in `i'
		drop res
	
		* store coefs:
		replace archterm = e(b)[1,8] in `i' // not sure if we need this
		replace garchterm = e(b)[1,9] in `i'
		replace hetx = e(b)[1,5] in `i'
		replace hetz = e(b)[1,6] in `i'
		replace constant = e(b)[1,7] in `i'
	}
	else {
		* do nothing, no convergence
	}
	
}

* with the M sims, create stable predictions. We'll set X = 1 and Z = 1, no ARCH effects and will burn-in to get a stable prediction. I don't see a need to do anything different than our generating new variables for each burnin since we're doing all M at once.

* we don't care about intermediate values so we just want a stable prediction say 100 time points in:
gen preds = exp(constant + hetx*1 + hetz*1) // init guess
forv i = 1/100 {
	replace preds = garchterm*preds + exp(constant + hetx*1 + hetz*1) + 0*archterm // replace w/ past value to achieve stable GARCH prediction
}
su preds, det // so this is our 95% CI and ave prediction. make UL and LL anyway along w/ median
gen pred_sd = r(sd)
_pctile preds, p(2.5, 50, 97.5)
gen pred_ll = r(r1)
gen pred_median = r(r2)
gen pred_ul = r(r3)

* only need to keep coefs and prediction
keep normalitypval - pred_ul
qui su normalitypval
keep in 1/`r(N)' // only keep actual # of sims
foreach var of varlist normalitypval - pred_ul {
	rename `var' `var'_BASE
}
gen sims = _n
compress
save "data/dgp_scenario1_BASEPRED.dta", replace
* -----------------------------------------------------------------------

* -----------------------------------------------------------------------
* -->	STEP 3: PARAMETRIC BOOTSTRAP
* technically could just use the models above. Yet unlike above we're creating 1000 predictions PER SIM. To keep things feasible for each sim we  record the following in long form (so length of this dataset is length of sims, not length of each bootstrap)
* estimate GARCH model on dataset m. take 1000 P-bs draws
* record ave beta's and SD. 
* use 1000 P-bs draws to create predictions. Take median and CIs of this. record them.
* thus we'll have an M-long matrix where each column is bar(beta), sd(beta), median(pred), ul(pred), ll(pred)
* and I think since we're clearing in order to use corr2data we should keep it all in a matrix
mat storage = J(`dgp_sims',14,.) // cols are:
* 1. estimated B-bar, ARCH term
* 2. estimated B-bar, GARCh term
* 3. estimated B-bar, Het X term
* 4. estimated B-bar, Het Z term
* 5. estimated B-bar, Het constant
* 6. estimated B-sd, ARCH term
* 7. estimated B-sd, GARCh term
* 8. estimated B-sd, Het X term
* 9. estimated B-sd, Het Z term
* 10. estimated B-sd, Het constant
* 11. median prediction
* 12. ll prediction
* 13. ul prediction
* 14. sd prediction

use "dgp_scenario1.dta", clear
set seed 1245039932
tsset t
set obs 2000 // ensure we can store up to M results


* now estimate M GARCH models, saving coefficients from HET eq:
forv i = 1/`dgp_sims' {
	* run garch
	cap arch y_`i' l.y_`i' x_`i' z_`i', arch(1) garch(1) het(x_`i' z_`i')
	
	if "`e(converged)'" == "1" {
		mat B = e(b)
		mat VCV = e(V)
	
		preserve
		clear
		corr2data junk1 junk2 junk3 junk4 hetx hetz constant archterm garchterm, n(`rep_sims') means(B) cov(VCV)
	
		* summarize and place means/sd
		qui su hetx
		mat storage[`i',3] = r(mean)
		mat storage[`i',8] = r(sd)
	
		qui su hetz
		mat storage[`i',4] = r(mean)
		mat storage[`i',9] = r(sd)
	
		qui su constant
		mat storage[`i',5] = r(mean)
		mat storage[`i',10] = r(sd)
	
		qui su archterm
		mat storage[`i',1] = r(mean)
		mat storage[`i',6] = r(sd)
	
		qui su garchterm
		mat storage[`i',2] = r(mean)
		mat storage[`i',7] = r(sd)
	
		* now create preds
		gen preds = exp(constant + hetx*1 + hetz*1) // init guess
		forv j = 1/100 {
			replace preds = garchterm*preds + exp(constant + hetx*1 + hetz*1) + 0*archterm // replace w/ past value to achieve stable GARCH prediction
		}
		* and get med and 95% CIs
		_pctile preds, p(2.5, 50, 97.5)
		mat storage[`i',12] = r(r1)
		mat storage[`i',11] = r(r2)
		mat storage[`i',13] = r(r3)
		su preds
		mat storage[`i',14] = r(sd)
		restore	
	}
	else {
		* do nothing. no convergence
	}
}
mat list storage
clear
svmat storage
* 1. estimated B-bar, ARCH term
rename storage1 archterm_mean
* 2. estimated B-bar, GARCh term
rename storage2 garchterm_mean
* 3. estimated B-bar, Het X term
rename storage3 hetx_mean
* 4. estimated B-bar, Het Z term
rename storage4 hetz_mean
* 5. estimated B-bar, Het constant
rename storage5 constant_mean
* 6. estimated B-sd, ARCH term
rename storage6 archterm_sd
* 7. estimated B-sd, GARCh term
rename storage7 garchterm_sd
* 8. estimated B-sd, Het X term
rename storage8 hetx_sd
* 9. estimated B-sd, Het Z term
rename storage9 hetz_sd
* 10. estimated B-sd, Het constant
rename storage10 constant_sd
* 11. median prediction
rename storage11 pred_median_PBS
* 12. ll prediction
rename storage12 pred_ll_PBS
* 13. ul prediction
rename storage13 pred_ul_PBS
* 14. sd prediction
rename storage14 pred_sd
gen sims = _n
compress
save "data/dgp_scenario1_PBSPRED.dta", replace
* -----------------------------------------------------------------------
*


*
* -----------------------------------------------------------------------
* -->	STEP 4: MAXIMUM ENTROPY BOOTSTRAP
* 
* -----------------------------------------------------------------------
mat storage = J(`dgp_sims',14,.) // cols are:
* 1. estimated B-bar, ARCH term
* 2. estimated B-bar, GARCh term
* 3. estimated B-bar, Het X term
* 4. estimated B-bar, Het Z term
* 5. estimated B-bar, Het constant
* 6. estimated B-sd, ARCH term
* 7. estimated B-sd, GARCh term
* 8. estimated B-sd, Het X term
* 9. estimated B-sd, Het Z term
* 10. estimated B-sd, Het constant
* 11. median prediction
* 12. ll prediction
* 13. ul prediction
* 14. sd prediction

set seed 1245039932
* Run rscript.
* ARGS are: number in the boot (e.g., y_`1' boots y_1), dataset to boot from, length of T, number of reps
forv i = 1/`dgp_sims' {
	rscript using "scripts-programs/me-call.R", args("`i'" "dgp_scenario1.dta" "250" "`rep_sims'")

	* now load in the generated data:
	import delimited "me-draws.csv", clear 
	tsset time
	set obs 2000
	* create empty slots to hold betas:
	foreach name in hetx hetz constant archterm garchterm {
		gen `name' = .
	}

	* now for each of the synthetic Y's created for this particular y_`i', we'll run b GARCH models (the OG vars are dynamically updated via rscript)
	forv b = 1/`rep_sims' {
		cap arch meboot_y_`b' l.meboot_y_`b' og_x og_z, arch(1) garch(1) het(og_x og_z)
	
		if "`e(converged)'" == "1" {
			* store betas
			replace hetx = _b[HET:og_x] in `b'
			replace hetz = _b[HET:og_z] in `b'
			replace constant = _b[HET:_cons] in `b'
			replace archterm = _b[ARCH:L.arch] in `b'
			replace garchterm = _b[ARCH:L.garch] in `b'
		}
		else {
			* do nothing. No convergence
		}
	}
	
	* summarize and place means/sd
	qui su hetx
	mat storage[`i',3] = r(mean)
	mat storage[`i',8] = r(sd)
	
	qui su hetz
	mat storage[`i',4] = r(mean)
	mat storage[`i',9] = r(sd)
	
	qui su constant
	mat storage[`i',5] = r(mean)
	mat storage[`i',10] = r(sd)
	
	qui su archterm
	mat storage[`i',1] = r(mean)
	mat storage[`i',6] = r(sd)
	
	qui su garchterm
	mat storage[`i',2] = r(mean)
	mat storage[`i',7] = r(sd)
	
	* now create preds
	gen preds = exp(constant + hetx*1 + hetz*1) // init guess
	forv j = 1/100 {
		replace preds = garchterm*preds + exp(constant + hetx*1 + hetz*1) + 0*archterm // replace w/ past value to achieve stable GARCH prediction
	}
	* and get med and 95% CIs
	_pctile preds, p(2.5, 50, 97.5)
	mat storage[`i',12] = r(r1)
	mat storage[`i',11] = r(r2)
	mat storage[`i',13] = r(r3)
	su preds
	mat storage[`i',14] = r(sd)
	
}

mat list storage
clear
svmat storage
* 1. estimated B-bar, ARCH term
rename storage1 archterm_mean
* 2. estimated B-bar, GARCh term
rename storage2 garchterm_mean
* 3. estimated B-bar, Het X term
rename storage3 hetx_mean
* 4. estimated B-bar, Het Z term
rename storage4 hetz_mean
* 5. estimated B-bar, Het constant
rename storage5 constant_mean
* 6. estimated B-sd, ARCH term
rename storage6 archterm_sd
* 7. estimated B-sd, GARCh term
rename storage7 garchterm_sd
* 8. estimated B-sd, Het X term
rename storage8 hetx_sd
* 9. estimated B-sd, Het Z term
rename storage9 hetz_sd
* 10. estimated B-sd, Het constant
rename storage10 constant_sd
* 11. median prediction
rename storage11 pred_median_PBS
* 12. ll prediction
rename storage12 pred_ll_PBS
* 13. ul prediction
rename storage13 pred_ul_PBS
* 14. sd prediction
rename storage14 pred_sd
gen sims = _n
compress
save "data/dgp_scenario1_MEPRED.dta", replace

* -----------------------------------------------------------------------
* -->	STEP 5: RESIDUAL BOOTSTRAP
* run residual bootstrap program
do "scripts-programs/bootr2.ado" // run resid bootstrap program
do "scripts-programs/bootrone.ado" // run resid bootstrap program
loc dgp_sims = 500 // how many draws from the DGP are we taking?
loc rep_sims = 500 // how many reps OF a dgp_sim do we want to take?

mat storage = J(`dgp_sims',14,.) // cols are:
* 1. estimated B-bar, ARCH term
* 2. estimated B-bar, GARCh term
* 3. estimated B-bar, Het X term
* 4. estimated B-bar, Het Z term
* 5. estimated B-bar, Het constant
* 6. estimated B-sd, ARCH term
* 7. estimated B-sd, GARCh term
* 8. estimated B-sd, Het X term
* 9. estimated B-sd, Het Z term
* 10. estimated B-sd, Het constant
* 11. median prediction
* 12. ll prediction
* 13. ul prediction
* 14. sd prediction

use "dgp_scenario1.dta", clear
tsset t
set seed 45323323
set obs 2000 // just to make sure we have enough room

gen _sigma2 = . // for storing sigma2. It gets replaced every loop 
forv b = 1/`rep_sims' { // blank vector of bs created y 
	gen y_b`b' = .
}
gen preds = . // for storing predictions

* create empty slots to hold estimated betas:
	foreach name in hetx hetz constant archterm garchterm {
		gen `name' = .
}

forv i = 1/`dgp_sims' {
	* Estimate GARCH model
	cap arch y_`i' l.y_`i' x_`i' z_`i', arch(1) garch(1) het(x_`i' z_`i')
	
	if "`e(converged)'" == "1" {
		cap drop nu
		cap drop sigma2
		cap drop epsilon
		predict nu, resid // nu_t = \sigma_t\epsilon_t
		predict sigma2, variance // sigma^2_t
		gen epsilon = nu/sqrt(sigma2) // rescale, nu_t/\sigma_t = \epsilon_t
		su epsilon, meanonly // mean-center
		replace epsilon = epsilon-r(mean)
	
		* loop across bootstraps:
		forv b = 1/`rep_sims' {
			* Create initial values of y and sigma2 at t=0. Previously we were trying a burn-in process but that meant we lost an additional observation
		
			* draw residuals:
			bootrone, resid(epsilon) // this will be the t=0 error which we need for sigma2
			loc residvalue = r(residvalue) // only saving as loc b/c of repeat call below
			* resid for t=1 error for y
			bootrone, resid(epsilon) 
			loc residvaluey = r(residvalue)

		
			* independently from resid, draw bs at same time for y and sigma2
			bootrone, yvalue(y_`i') sigma2value(sigma2)
		
			* init sigma2 at t=1
			replace _sigma2 = _b[ARCH:L.arch]*`residvalue'^2*r(sigma2init) + ///
				_b[ARCH:L.garch]*r(sigma2init) + exp(_b[HET:_cons] + ///
				_b[HET:x_`i']*x_`i' + _b[HET:z_`i']*z_`i') in 1
			* also sigma2 at t=2 since we need `residvaluey'
			replace _sigma2 = _b[ARCH:L.arch]*`residvaluey'^2*l._sigma2 + ///
				_b[ARCH:L.garch]*l._sigma2 + exp(_b[HET:_cons] + ///
				_b[HET:x_`i']*x_`i' + _b[HET:z_`i']*z_`i') in 2
			
			* init y at t=1
			replace y_b`b' = _b[y_`i':_cons] + _b[y_`i':L.y_`i']*r(yinit) + ///
				_b[y_`i':x_`i']*x_`i' + _b[y_`i':z_`i']*z_`i' + ///
				sqrt(_sigma2)*`residvaluey' in 1
		
			* pull bootstrap residuals for t=2/T
			cap drop boot_e
			bootr2, old(epsilon) newname(boot_e) minT(2)
		
			* now recursively generate sigma2 and y for 2/T
			replace _sigma2 = _b[ARCH:L.arch]*l.boot_e^2*l._sigma2 + ///
				_b[ARCH:L.garch]*l._sigma2 + exp(_b[HET:_cons] + ///
				_b[HET:x_`i']*x_`i' + _b[HET:z_`i']*z_`i') in 3/L
			
			replace y_b`b' = _b[y_`i':_cons] + _b[y_`i':L.y_`i']*l.y_b`b' + ///
				_b[y_`i':x_`i']*x_`i' + _b[y_`i':z_`i']*z_`i' + sqrt(_sigma2)*boot_e in 2/L
		}
	
		* now that everything's generated for y_`i', estimate `b' bootstrap garch's for it:
		forv b = 1/`rep_sims' {
			* now estimate GARCH for this
			cap arch y_b`b' l.y_b`b' x_`i' z_`i', arch(1) garch(1) het(x_`i' z_`i')
		
			if "`e(converged)'" == "1" {
				* store betas
				replace hetx = _b[HET:x_`i'] in `b'
				replace hetz = _b[HET:z_`i'] in `b'
				replace constant = _b[HET:_cons] in `b'
				replace archterm = _b[ARCH:L.arch] in `b'
				replace garchterm = _b[ARCH:L.garch] in `b'
			}
			else {
				* do nothing. No convergence
			}
		}
	
		* summarize and place means/sd
		qui su hetx
		mat storage[`i',3] = r(mean)
		mat storage[`i',8] = r(sd)
	
		qui su hetz
		mat storage[`i',4] = r(mean)
		mat storage[`i',9] = r(sd)
	
		qui su constant
		mat storage[`i',5] = r(mean)
		mat storage[`i',10] = r(sd)
	
		qui su archterm
		mat storage[`i',1] = r(mean)
		mat storage[`i',6] = r(sd)
	
		qui su garchterm
		mat storage[`i',2] = r(mean)
		mat storage[`i',7] = r(sd)	
	
	
		* now create preds
		replace preds = exp(constant + hetx*1 + hetz*1) // init guess
		forv j = 1/100 {
			replace preds = garchterm*preds + exp(constant + hetx*1 + hetz*1) + ///
				0*archterm // replace w/ past value to achieve stable GARCH prediction
		}
		* and get med and 95% CIs
		_pctile preds, p(2.5, 50, 97.5)
		mat storage[`i',12] = r(r1)
		mat storage[`i',11] = r(r2)
		mat storage[`i',13] = r(r3)
		su preds
		mat storage[`i',14] = r(sd)
	} // successful first ARCH model
	
	else {
		* do nothing...failed ARCH 
	}
	
} // close dgp_sims loop

mat list storage
clear
svmat storage
* 1. estimated B-bar, ARCH term
rename storage1 archterm_mean
* 2. estimated B-bar, GARCh term
rename storage2 garchterm_mean
* 3. estimated B-bar, Het X term
rename storage3 hetx_mean
* 4. estimated B-bar, Het Z term
rename storage4 hetz_mean
* 5. estimated B-bar, Het constant
rename storage5 constant_mean
* 6. estimated B-sd, ARCH term
rename storage6 archterm_sd
* 7. estimated B-sd, GARCh term
rename storage7 garchterm_sd
* 8. estimated B-sd, Het X term
rename storage8 hetx_sd
* 9. estimated B-sd, Het Z term
rename storage9 hetz_sd
* 10. estimated B-sd, Het constant
rename storage10 constant_sd
* 11. median prediction
rename storage11 pred_median_PBS
* 12. ll prediction
rename storage12 pred_ll_PBS
* 13. ul prediction
rename storage13 pred_ul_PBS
* 14. sd prediction
rename storage14 pred_sd
gen sims = _n
compress
save "data/dgp_scenario1_RESIDPRED.dta", replace	
	
